# Clean up
rm(list = ls())

# Set working directory; please set your own here
setwd("~/Dropbox/Fragmentation/fragmentation_replication_bjps/")

library(tidyverse)
library(foreign)
library(cowplot)

# Import data with main analyses
data <- read.dta("Results/analyses_final.dta")

# Subset relevant results
coefficients_data <- data %>%
  filter(Sample == "Whole") %>%
  filter(id == "No. parties just above")  %>%
  filter(Outcome != "Index government fractionalization") 

# Plot coefficients (panel A)
coefficients_plot <- coefficients_data %>%
  mutate(Sample = ifelse(optimal == 1, "Main model", "Robustness checks")) %>%
  ggplot(aes(x = Effect)) + 
  geom_density(alpha = 0.4, aes(color = Sample, fill = Sample)) +
  scale_fill_manual(values = c("black", "green")) +
  scale_color_manual(values = c("black", "green4")) +
  theme_bw( ) +
  theme(legend.position = "none") +
  ylab("Density") + 
  xlab("Coefficient (standardized)") + 
  coord_cartesian(xlim = c(-0.5, 0.5)) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed", size = 1)
coefficients_plot

# Plot p-values (panel B)
pvalues_plot <- coefficients_data %>%
  filter(id == "No. parties just above")  %>%
  filter(outcome_type != "Summary measures") %>%
  filter(Outcome != "Index government fractionalization")  %>%
  mutate(Sample = ifelse(optimal == 1, "Main model", "Robustness checks")) %>%
  ggplot(aes(x = pval)) + 
  geom_density(alpha = 0.4, aes(color = Sample, fill = Sample)) +
  scale_fill_manual(values = c("black", "green")) +
  scale_color_manual(values = c("black", "green4")) +
  theme_bw( ) +
  theme(legend.position = c(0.5, 0.2)) +
  scale_y_continuous(limits = c(0, 1.6)) +
  ylab("Density") + xlab("p-value") + 
  geom_vline(xintercept = 0.05, color = "red", linetype = "dashed", size = 1)
pvalues_plot

# Plot first and second stage (panel C)
first_second_plot <- coefficients_data %>%
  mutate(abs_coef = abs(Effect)) %>%
  filter(id == "No. parties just above")  %>%
  filter(outcome_type != "Summary measures") %>%
  filter(Outcome != "Index government fractionalization")  %>%  
  mutate(Identification = ifelse(id == "Treatm status of closest parties", 
                                 "Treatm status of closest parties", "No. parties just above")) %>%
  ggplot(aes(x = fstat, y = abs_coef)) +
  geom_point(alpha = 0.1) +
  coord_cartesian(ylim = c(0, 1)) +
  geom_smooth(method = "loess", color = "red", span = 2, se = T) +
  labs(x = "F-statistic in the first stage", y = "Absolute coefficient size in the second stage (standardized)") +
  scale_colour_manual(values = c("black", "blue")) +
  theme_bw() +
  guides(colour = guide_legend(override.aes = list(alpha = 1)))
first_second_plot

# Put the three plots together
allmodels_plot <- ggdraw() +
  draw_plot(coefficients_plot, 0, .5, .5, .5) +
  draw_plot(pvalues_plot, .5, .5, .5, .5) +
  draw_plot(first_second_plot, 0, 0, 1, .5) +
  draw_plot_label(c("A", "B", "C"), c(0, 0.5, 0.02), c(1, 1, 0.53), size = 15)
allmodels_plot

# Save final plot
ggsave("Plots/fig5.png", plot = allmodels_plot, width = 25, height = 22.5, units = "cm") 

# Check the mean and std deviation of coefficients and p-values
mean(coefficients_data$Effect, na.rm = T)
sd(coefficients_data$Effect, na.rm = T)
mean(coefficients_data$pval, na.rm = T)
sd(coefficients_data$pval, na.rm = T)